import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import preprocessing
from data_load import get_clean_data,normalize_1_variables,normalize_2_variables,normalize_3_variables
sns.set(style="whitegrid")
sns.set(style="ticks", color_codes=True)
sns.set(font_scale=2)
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
# another cell
import matplotlib.pyplot as plt
%matplotlib inline
for i in range(10):
plt.plot(range(10))
plt.show()
data = get_clean_data('quiz_data.csv',True)
print(data.columns)
print("Summary of the variable: program")
fig, axs = plt.subplots(1,2)
fig.suptitle('program')
data['program'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['program'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: happy")
fig, axs = plt.subplots(1,2)
fig.suptitle('happy')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: problem_type")
fig, axs = plt.subplots(1,2)
fig.suptitle('problem_type')
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: creative")
fig, axs = plt.subplots(1,2)
fig.suptitle('creative')
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
# print("Summary of the variable: industry")
# fig, axs = plt.subplots(1,2)
# fig.suptitle('industry')
# data['industry'].value_counts().plot(kind='bar',
# figsize=(14,8),
# title="Frequency",
# ax=axs[0])
# (data['industry'].value_counts(normalize=True) * 100).plot(kind='bar',
# figsize=(14,8),
# title="Percent",
# ax=axs[1])
print("Summary of the variable: outdoors")
fig, axs = plt.subplots(1,2)
fig.suptitle('outdoors')
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: career")
fig, axs = plt.subplots(1,2)
fig.suptitle('career')
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: group_work")
fig, axs = plt.subplots(1,2)
fig.suptitle('group_work')
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: liked_courses")
fig, axs = plt.subplots(1,2)
fig.suptitle('liked_courses')
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: disliked_courses")
fig, axs = plt.subplots(1,2)
fig.suptitle('disliked_courses')
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: programming")
fig, axs = plt.subplots(1,2)
fig.suptitle('programming')
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: join_clubs")
fig, axs = plt.subplots(1,2)
fig.suptitle('join_clubs')
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: not_clubs")
fig, axs = plt.subplots(1,2)
fig.suptitle('not_clubs')
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: liked_projects")
fig, axs = plt.subplots(1,2)
fig.suptitle('liked_projects')
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: disliked_projects")
fig, axs = plt.subplots(1,2)
fig.suptitle('disliked_projects')
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: tv_shows")
fig, axs = plt.subplots(1,2)
fig.suptitle('tv_shows')
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: alternate_degree")
fig, axs = plt.subplots(1,2)
fig.suptitle('alternate_degree')
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: expensive_equipment")
fig, axs = plt.subplots(1,2)
fig.suptitle('expensive_equipment')
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: drawing")
fig, axs = plt.subplots(1,2)
fig.suptitle('drawing')
data['drawing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['drawing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: essay")
fig, axs = plt.subplots(1,2)
fig.suptitle('essay')
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: mech")
data = data[data.program=='mech']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('mech')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: bmed")
data = data[data.program=='bmed']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('bmed')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: sft")
data = data[data.program=='sft']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('sft')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: ce")
data = data[data.program=='ce']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('ce')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: tron")
data = data[data.program=='tron']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('tron')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: cive")
data = data[data.program=='cive']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('cive')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: chem")
data = data[data.program=='chem']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('chem')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: syde")
data = data[data.program=='syde']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('syde')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: msci")
data = data[data.program=='msci']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('msci')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: elec")
data = data[data.program=='elec']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('elec')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: nano")
data = data[data.program=='nano']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('nano')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: geo")
data = data[data.program=='geo']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('geo')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: env")
data = data[data.program=='env']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('env')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: arch-e")
data = data[data.program=='arch-e']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('arch-e')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: arch")
data = data[data.program=='arch']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('arch')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
print("program vs. happy")
plot_data = normalize_1_variables(data,"program","happy")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="happy",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. problem_type")
plot_data = normalize_1_variables(data,"program","problem_type")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="problem_type",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. creative")
plot_data = normalize_1_variables(data,"program","creative")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="creative",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
# print("program vs. industry")
# plot_data = normalize_1_variables(data,"program","industry")
# g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="industry",
# data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
# g.set_xlabels('')
# g.set_ylabels('percent')
# for ax in g.axes:
# plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
# plt.subplots_adjust(hspace=0.3)
# plt.show()
print("program vs. outdoors")
plot_data = normalize_1_variables(data,"program","outdoors")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="outdoors",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. career")
plot_data = normalize_1_variables(data,"program","career")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="career",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. group_work")
plot_data = normalize_1_variables(data,"program","group_work")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="group_work",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. liked_courses")
plot_data = normalize_1_variables(data,"program","liked_courses")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="liked_courses",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. disliked_courses")
plot_data = normalize_1_variables(data,"program","disliked_courses")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="disliked_courses",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. programming")
plot_data = normalize_1_variables(data,"program","programming")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="programming",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. join_clubs")
plot_data = normalize_1_variables(data,"program","join_clubs")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="join_clubs",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. not_clubs")
plot_data = normalize_1_variables(data,"program","not_clubs")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="not_clubs",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. liked_projects")
plot_data = normalize_1_variables(data,"program","liked_projects")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="liked_projects",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. disliked_projects")
plot_data = normalize_1_variables(data,"program","disliked_projects")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="disliked_projects",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. tv_shows")
plot_data = normalize_1_variables(data,"program","tv_shows")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="tv_shows",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. alternate_degree")
plot_data = normalize_1_variables(data,"program","alternate_degree")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="alternate_degree",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. expensive_equipment")
plot_data = normalize_1_variables(data,"program","expensive_equipment")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="expensive_equipment",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. drawing")
plot_data = normalize_1_variables(data,"program","drawing")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="drawing",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. essay")
plot_data = normalize_1_variables(data,"program","essay")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="essay",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
import numpy as np
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from data_load import get_encoded_data
import json
import pandas as pd
import numpy as np
from sklearn import preprocessing
import pickle
directory = 'golf_data.csv'
df = pd.read_csv(directory,dtype=str)
df = df.drop(df.columns[[0,6]], axis=1)
print(df.head())
outlook = {'Rainy': 1, 'Overcast': 0, 'Sunny': 2, 'column': 'OUTLOOK'}
temperature = {'Hot': 1, 'Mild': 2, 'Cool': 0, 'column': 'TEMPERATURE'}
humidity = {'High': 0, 'Normal': 1, 'column': 'HUMIDITY'}
windy = {'FALSE': 0, 'TRUE': 1, 'column': 'WINDY'}
col_list = list(df.columns)
encoded_dict_list = []
for col in col_list:
keys = df[col].unique()
le = preprocessing.LabelEncoder()
le.fit(list(keys))
df[col] = le.transform(list(df[col]))
vals = df[col].unique()
keys = list(le.inverse_transform(vals))
cd = dict(zip(keys,vals))
cd['column'] = col
encoded_dict_list.append(cd)
print(encoded_dict_list)
print(df.head())
x_df = df.drop(axis=1,columns=["PLAY"])
y_df = df["PLAY"]
X = np.array(x_df) # convert dataframe into np array
y = np.array(y_df) # convert dataframe into np array
mnb = MultinomialNB()
model = mnb.fit(x_df, y_df) # fit the model using training data
cat = df.drop('PLAY',axis=1)
index_dict = dict(zip(cat.columns,range(cat.shape[1])))
with open('nb_model.pkl', 'wb') as fid:
pickle.dump(model, fid,2)
'''
We need to create our feature vector of exact same dimension as our training set. To convert our user input into dummy variables, we should save a dict of the the dummy variables. Later we can populate our feature vector for prediction using this dict.
'''
with open('cat', 'wb') as fid:
pickle.dump(index_dict, fid,2)
post_dict = {
'OUTLOOK':'Overcast',
'TEMPERATURE':'Cool',
'HUMIDITY':'Normal',
'WINDY':'FALSE'
}
new_vector = y
new_vector[0] = outlook[post_dict['OUTLOOK']]
new_vector[1] = temperature[post_dict['TEMPERATURE']]
new_vector[2] = humidity[post_dict['HUMIDITY']]
new_vector[3] = windy[post_dict['WINDY']]
new_vector = [new_vector[0:4]]
print(new_vector)
print("Loading model")
pkl_file = open('nb_model.pkl', 'rb')
nb_model = pickle.load(pkl_file)
prediction = nb_model.predict(new_vector)
if prediction == 0:
response_message = 'You should not play golf today'
rm = 'NO'
else:
response_message = 'You could play golf today'
rm = 'YES'
print(rm)
prediction = nb_model.predict_proba(new_vector)
print(prediction)
# First index is probability of no, second index is probability of yes
prediction = nb_model.predict_log_proba(new_vector)
print(prediction)
# Toggle Code
import ipywidgets as widgets
from IPython.display import display, HTML
javascript_functions = {False: "hide()", True: "show()"}
button_descriptions = {False: "Show code", True: "Hide code"}
def toggle_code(state):
"""
Toggles the JavaScript show()/hide() function on the div.input element.
"""
output_string = "<script>$(\"div.input\").{}</script>"
output_args = (javascript_functions[state],)
output = output_string.format(*output_args)
display(HTML(output))
def button_action(value):
"""
Calls the toggle_code function and updates the button description.
"""
state = value.new
toggle_code(state)
value.owner.description = button_descriptions[state]
state = False
toggle_code(state)
button = widgets.ToggleButton(state, description = button_descriptions[state])
button.observe(button_action, "value")
display(button)